library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

Read the data from github

#' Get WTA or ATP tour match results from the Tennis Abstract github for the specified year (https://github.com/JeffSackmann)
#'
#' @param year an integer. The earliest years where there are data: wta 1920; atp 1968.
#' @param tour a string either 'wta' or 'atp'
#'
#' @return a dataframe of match results 

get_res <- function(year, tour){

  # form file location string based on year and tour
  repo_url <- glue::glue("https://raw.githubusercontent.com/JeffSackmann/tennis_{tour}/master/")
  file_name <- glue::glue("{tour}_matches_{year}.csv")
  file_location <- str_c(repo_url, file_name)
  #browser()

  # read in data from csv on github
  res_year <- read_csv(file_location,
                      
                      # address issue with column type being read differently
                      # in different files
                      col_types = list(winner_seed = col_double(),
                                         loser_seed = col_double(),
                                       draw_size = col_double())) %>% 
    
    # make variable names consistent
    janitor::clean_names() %>%

    # add identifiers
    mutate(tour = tour, 
           year = year)

  return(res_year)
}


start_year <- 1968
end_year <-  2022

seq(start_year, end_year) %>% 
  map_df(~get_res(., "atp"))
## Warning: One or more parsing issues, see `problems()` for details
## One or more parsing issues, see `problems()` for details
## One or more parsing issues, see `problems()` for details
## One or more parsing issues, see `problems()` for details
## One or more parsing issues, see `problems()` for details
#read_csv("https://raw.githubusercontent.com/JeffSackmann/tennis_atp/master/atp_matches_2022.csv")